library(readstata13)
library(zoo)
library(foreign)

setwd("C:/Users/feb220/Dropbox/Party strength/Empirics/Data")

### Party Switching ###
# Party Switching has no MM posterior, so we generate a posterior-like matrix assuming a normal distribution of the estimates for party switching
vdem <- read.dta13("vdem_6.2.dta")
vars = c('country_text_id', 'year', 'v2psswitch', 'v2psswitch_codelow', 'v2psswitch_codehigh')
pssw <- vdem[vars]
pssw$sd <- ((pssw$v2psswitch - pssw$v2psswitch_codelow) + (pssw$v2psswitch_codehigh - pssw$v2psswitch)) / 2

swt <- NULL
for(i in 1:nrow(pssw)){
  row <- rnorm(900, mean = pssw$v2psswitch[i], sd = pssw$sd[i])
  swt <- as.data.frame(rbind(swt, row))
}

prswt <- cbind(pssw[,1:2], swt)

for (i in 3:ncol(prswt)) {
  colnames(prswt)[i] = paste0("v2psswitch",i-2)
}

write.csv(prswt, file = "v2psswitch.posterior_clean.csv", row.names = F)
rm(pssw, swt)

### Candidate Selection ###
cnsln <- read.csv("v2pscnslnl.20000.z.sample.csv")
country_text_id <- substr(rownames(cnsln),1,3)
year <- substr(rownames(cnsln),5,8)
cnsln <- cbind(country_text_id, year, cnsln)
cnsln$country_text_id <- as.character(cnsln$country_text_id)
cnsln$year <- as.numeric(as.character(cnsln$year))
cnsln <- cnsln[order(rownames(cnsln), decreasing=TRUE),] # reorder
cnsln <- cnsln[!(duplicated(cnsln[c("country_text_id","year")])), ] # excludes duplicate observations for the same year preserving the latest value

# creating a key dataframe with all countries for 116 years (includes years not coded in V-Dem. Needs to drop those cases later)
cn_id <- unique(cnsln$country_text_id) # extract country_ids
country_text_id <- rep(cn_id, 116) # repeat them 116 times
year <- c(1900:2015) # create years
year <- rep(year, length(cn_id)) # repeat 174 times (n of countries)
year <- sort(year)
base <- data.frame(cbind(country_text_id, year)) #cbind the two
base$country_text_id <- as.character(base$country_text_id)
base$year <- as.numeric(as.character(base$year))

# Merging data and key
vars = c('country_text_id', 'year', 'v2pscnslnl')
means <- vdem[which(!is.na(vdem$v2pscnslnl)),vars]
cnsln2 <- merge(means, cnsln, by = c("country_text_id", "year"), all.x=T, all.y=F) # merge base and sample
cnsln2 <- na.locf(cnsln2) # Last Observation Carried Forward (locf); part of zoo package
cnsln2$v2pscnslnl <- NULL
cnsln2[,3:902] <- sapply(cnsln2[,3:902],as.numeric)
for (i in 3:ncol(cnsln2)) {
  colnames(cnsln2)[i] = paste0("v2pscnslnl",i-2)
}

# saving
write.csv(cnsln2, file = "v2pscnslnl_posterior_clean.csv", row.names = F)

### Legislative Cohesion ###
cohsv <- read.csv("v2pscohesv.10000.z.sample.csv")
country_text_id <- substr(rownames(cohsv),1,3)
year <- substr(rownames(cohsv),5,8)
cohsv <- cbind(country_text_id, year, cohsv)
cohsv$country_text_id <- as.character(cohsv$country_text_id)
cohsv$year <- as.numeric(as.character(cohsv$year))
cohsv <- cohsv[order(rownames(cohsv), decreasing=TRUE),] # reorder
cohsv <- cohsv[!(duplicated(cohsv[c("country_text_id","year")])), ] # excludes duplicate observations for the same year preserving the latest value

# creating a key dataframe with all countries for 116 years (includes years not coded in V-Dem. Needs to drop those cases later)
cn_id <- unique(cohsv$country_text_id) # extract country_ids
country_text_id <- rep(cn_id, 116) # repeat them 116 times
year <- c(1900:2015) # create years
year <- rep(year, length(cn_id)) # repeat 174 times (n of countries)
year <- sort(year)
base <- data.frame(cbind(country_text_id, year)) #cbind the two
base$country_text_id <- as.character(base$country_text_id)
base$year <- as.numeric(as.character(base$year))

# Merging data and key
vars = c('country_text_id', 'year', 'v2pscnslnl')
means <- vdem[which(!is.na(vdem$v2pscnslnl)),vars]
cohsv2 <- merge(means, cohsv, by = c("country_text_id", "year"), all.x=T, all.y=F) # merge base and sample
cohsv2 <- na.locf(cohsv2) # Last Observation Carried Forward (locf); part of zoo package
cohsv2$v2pscnslnl <- NULL
cohsv2[,3:902] <- sapply(cohsv2[,3:902],as.numeric)
for (i in 3:ncol(cohsv2)) {
  colnames(cohsv2)[i] = paste0("v2pscohesv",i-2)
}

# saving
write.csv(cohsv2, file = "v2pscohesv_posterior_clean.csv", row.names = F)

### Party Organization ###
orgs <- read.csv("v2psorgs.10000.z.sample.csv")
country_text_id <- substr(rownames(orgs),1,3)
year <- substr(rownames(orgs),5,8)
orgs <- cbind(country_text_id, year, orgs)
orgs$country_text_id <- as.character(orgs$country_text_id)
orgs$year <- as.numeric(as.character(orgs$year))
orgs <- orgs[order(rownames(orgs), decreasing=TRUE),] # reorder
orgs <- orgs[!(duplicated(orgs[c("country_text_id","year")])), ] # excludes duplicate observations for the same year preserving the latest value

# creating a key dataframe with all countries for 116 years (includes years not coded in V-Dem. Needs to drop those cases later)
cn_id <- unique(orgs$country_text_id) # extract country_ids
country_text_id <- rep(cn_id, 116) # repeat them 116 times
year <- c(1900:2015) # create years
year <- rep(year, length(cn_id)) # repeat 174 times (n of countries)
year <- sort(year)
base <- data.frame(cbind(country_text_id, year)) #cbind the two
base$country_text_id <- as.character(base$country_text_id)
base$year <- as.numeric(as.character(base$year))

# Merging data and key
# Merging data and key
vars = c('country_text_id', 'year', 'v2pscnslnl')
means <- vdem[which(!is.na(vdem$v2pscnslnl)),vars]
orgs2 <- merge(means, orgs, by = c("country_text_id", "year"), all.x=T, all.y=F) # merge base and sample
orgs2 <- na.locf(orgs2) # Last Observation Carried Forward (locf); part of zoo package
orgs2$v2pscnslnl <- NULL
orgs2[,3:902] <- sapply(orgs2[,3:902],as.numeric)
for (i in 3:ncol(orgs2)) {
  colnames(orgs2)[i] = paste0("v2psorgs",i-2)
}

# saving
write.csv(orgs2, file = "v2psorgs_posterior_clean.csv", row.names = F)

### Party branches ###
brchs <- read.csv("v2psprbrch.10000.z.sample.csv")
country_text_id <- substr(rownames(brchs),1,3)
year <- substr(rownames(brchs),5,8)
brchs <- cbind(country_text_id, year, brchs)
brchs$country_text_id <- as.character(brchs$country_text_id)
brchs$year <- as.numeric(as.character(brchs$year))
brchs <- brchs[order(rownames(brchs), decreasing=TRUE),] # reorder
brchs <- brchs[!(duplicated(brchs[c("country_text_id","year")])), ] # excludes duplicate observations for the same year preserving the latest value

# creating a key dataframe with all countries for 116 years (includes years not coded in V-Dem. Needs to drop those cases later)
cn_id <- unique(brchs$country_text_id) # extract country_ids
country_text_id <- rep(cn_id, 116) # repeat them 116 times
year <- c(1900:2015) # create years
year <- rep(year, length(cn_id)) # repeat 174 times (n of countries)
year <- sort(year)
base <- data.frame(cbind(country_text_id, year)) #cbind the two
base$country_text_id <- as.character(base$country_text_id)
base$year <- as.numeric(as.character(base$year))

# Merging data and key
vars = c('country_text_id', 'year', 'v2pscnslnl')
means <- vdem[which(!is.na(vdem$v2pscnslnl)),vars]
brchs2 <- merge(means, brchs, by = c("country_text_id", "year"), all.x=T, all.y=F) # merge base and sample
brchs2 <- na.locf(brchs2) # Last Observation Carried Forward (locf); part of zoo package
brchs2$v2pscnslnl <- NULL
brchs2[,3:902] <- sapply(brchs2[,3:902],as.numeric)
for (i in 3:ncol(brchs2)) {
  colnames(brchs2)[i] = paste0("v2psprbrch",i-2)
}
# saving
write.csv(brchs2, file = "v2psprbrch_posterior_clean.csv", row.names = F)

### Programmatic linkages ###
links <- read.csv("v2psprlnks.10000.z.sample.csv")
country_text_id <- substr(rownames(links),1,3)
year <- substr(rownames(links),5,8)
links <- cbind(country_text_id, year, links)
links$country_text_id <- as.character(links$country_text_id)
links$year <- as.numeric(as.character(links$year))
links <- links[order(rownames(links), decreasing=TRUE),] # reorder
links <- links[!(duplicated(links[c("country_text_id","year")])), ] # excludes duplicate observations for the same year preserving the latest value

# creating a key dataframe with all countries for 116 years (includes years not coded in V-Dem. Needs to drop those cases later)
cn_id <- unique(links$country_text_id) # extract country_ids
country_text_id <- rep(cn_id, 116) # repeat them 116 times
year <- c(1900:2015) # create years
year <- rep(year, length(cn_id)) # repeat 174 times (n of countries)
year <- sort(year)
base <- data.frame(cbind(country_text_id, year)) #cbind the two
base$country_text_id <- as.character(base$country_text_id)
base$year <- as.numeric(as.character(base$year))

# Merging data and key
vars = c('country_text_id', 'year', 'v2pscnslnl')
means <- vdem[which(!is.na(vdem$v2pscnslnl)),vars]
links2 <- merge(means, links, by = c("country_text_id", "year"), all.x=T, all.y=F) # merge base and sample
links2 <- na.locf(links2) # Last Observation Carried Forward (locf); part of zoo package
links2$v2pscnslnl <- NULL
links2[,3:902] <- sapply(links2[,3:902],as.numeric)
for (i in 3:ncol(links2)) {
  colnames(links2)[i] = paste0("v2psprlnks",i-2)
}

# saving
write.csv(links2, file = "v2psprlnks_posterior_clean.csv")

### Creating the Index
# standardizing, adding, and averaging
index.m <- NULL
for(i in 3:902){
  links2[,i] <- scale(links2[,i])
  brchs2[,i] <- scale(brchs2[,i])
  orgs2[,i]  <- scale(orgs2[,i])
  cohsv2[,i] <- scale(cohsv2[,i])
  cnsln2[,i] <- scale(-1*cnsln2[,i])
  prswt[,i] <- scale(-1*prswt[,i])
}

index.m <- merge(links2, brchs2, by = c('country_text_id', 'year'), all = T)
index.m <- merge(index.m, orgs2, by = c('country_text_id', 'year'), all = T)
index.m <- merge(index.m, cohsv2, by = c('country_text_id', 'year'), all = T)
index.m <- merge(index.m, cnsln2, by = c('country_text_id', 'year'), all = T)
index.m <- merge(index.m, prswt, by = c('country_text_id', 'year'), all.x = T)

index = data.frame(matrix(NA, nrow(index.m),900))

for(i in 1:900){
  vars = c(paste0('v2psprlnks',i), paste0('v2psprbrch',i), paste0('v2psorgs',i), paste0('v2pscohesv',i), paste0('v2pscnslnl',i), paste0('v2psswitch',i))
  selection = index.m[vars]
  index[,i] = rowMeans(selection)
  colnames(index)[i] = paste0('ps_',i)
}

index = cbind(index.m[,1:2],index)
vars = c('country_id', 'year', 'ps', 'e_migdppcln', 'e_migdpgro')
key = unique(vdem[,c('country_id', 'country_text_id')])
psdata = read.dta13('ps_data.dta')
psdata = psdata[vars]
psdata = merge(key, psdata, by = 'country_id', all.y = T)
index = merge(psdata,index, by = c('country_text_id', 'year'))

# saving
write.csv(index, file = "ps_posterior_data.csv")
write.dta(index, file = "ps_posterior_data.dta")
